/* Copyright 2012 Tobias Marschall
 *
 * This file is part of CLEVER.
 *
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>

#include <boost/program_options.hpp>

#include <bamtools/api/BamReader.h>
#include <bamtools/api/BamWriter.h>

#include "ASTagCalculator.h"
#include "ThreadPool.h"
#include "VersionInfo.h"

using namespace std;
namespace po = boost::program_options;

void usage(const char* name, const po::options_description& options_desc) {
	cerr << "Usage: " << name << " [options] <reference.fasta>" << endl;
	cerr << endl;
	cerr << "Reads BAM format from stdin and outputs BAM format to stdout, where " << endl;
	cerr << "AS tags containing a (re-)computed alignment scores have been added." << endl;
	cerr << "The alignment scores are the sum of base qualities at mismatch positions" << endl;
	cerr << "as extracted from the phred string." << endl;
	cerr << endl;
	cerr << "If given filename ends on \".gz\", its contents is unzipped." << endl;
	cerr << endl;
	cerr << "NOTE: The first word of the sequence names in <reference.fasta> is used," << endl;
	cerr << "i.e. if fasta line is " << endl;
	cerr << ">chr1 xyz" << endl;
	cerr << "then only \"chr1\" is used and searched for in the BAM/SAM file." << endl;
	cerr << endl;
	cerr << options_desc << endl;
	exit(1);
}

typedef struct work_package_t {
	BamTools::BamAlignment* read_aln;
	const ASTagCalculator& as_tag_calculator;
	const BamTools::RefVector& bam_ref_data;
	ASTagCalculator::stats_t stats;
	bool success;
	
	work_package_t(BamTools::BamAlignment* read_aln, const ASTagCalculator& as_tag_calculator, const BamTools::RefVector& bam_ref_data) : read_aln(read_aln), as_tag_calculator(as_tag_calculator), bam_ref_data(bam_ref_data), stats(), success(false) {}
	
	void run() {
		assert(read_aln != 0);
		success = as_tag_calculator.computeTag(*read_aln, bam_ref_data, &stats);
	}
} work_package_t;

typedef struct output_writer_t {
	BamTools::BamWriter& bam_writer;
	ASTagCalculator::stats_t stats;
	bool skip_unknown;
	
	output_writer_t(BamTools::BamWriter& bam_writer, bool skip_unknown) : bam_writer(bam_writer), stats(), skip_unknown(skip_unknown) {}
	
	void write(auto_ptr<work_package_t> work) {
		assert(work->read_aln != 0);
		if (work->success) {
			bam_writer.SaveAlignment(*work->read_aln);
		} else {
			if (!skip_unknown) {
				bam_writer.SaveAlignment(*work->read_aln);
			}
		}
		stats += work->stats;
		delete work->read_aln;
	}
} output_writer_t;

int main(int argc, char* argv[]) {
	VersionInfo::checkAndPrintVersion("add-score-tags-to-bam", cerr);
	string commandline = VersionInfo::commandline(argc, argv);

	// PARAMETERS
	int phred_offset;
	int bad_score_threshold;
	bool skip_unknown = false;
	int threads;

	po::options_description options_desc("Allowed options");
	options_desc.add_options()
		("phred_offset,p", po::value<int>(&phred_offset)->default_value(33), "Value to substract from ASCII code to get the PHRED quality.")
		("bad_alignment_threshold,b", po::value<int>(&bad_score_threshold)->default_value(1000), "Issue a warning when AS tag is above this value.")
		("skip_unknown,s", po::value<bool>(&skip_unknown)->zero_tokens(), "Do not output reads for which no AS tag could be computed, e.g. because the reference sequence was unknown.")
		("threads,T", po::value<int>(&threads)->default_value(0), "Number of threads (default: 0 = strictly single-threaded).")
	;

	if (isatty(fileno(stdin)) || (argc<2)) {
		usage(argv[0], options_desc);
	}
	string reference_filename(argv[argc-1]);
	argc -= 1;

	po::variables_map options;
	try {
		po::store(po::parse_command_line(argc, argv, options_desc), options);
		po::notify(options);
	} catch(exception& e) {
		cerr << "error: " << e.what() << "\n";
		return 1;
	}
	cerr << "Commandline: " << commandline << endl;

	ASTagCalculator tag_calculator(reference_filename,phred_offset,bad_score_threshold);
	cerr << "Done reading reference sequences" << endl;

	BamTools::BamReader bam_reader;
	if (!bam_reader.Open("/dev/stdin")) {
		cerr << "Error opening BAM input from /dev/stdin" << endl;
		return 1;
	}
	const BamTools::RefVector& bam_ref_data = bam_reader.GetReferenceData();
	const BamTools::SamHeader sam_header = bam_reader.GetHeader();
	BamTools::BamWriter bam_writer;
	if (!bam_writer.Open("/dev/stdout", sam_header, bam_ref_data)) {
		cerr << "Error writing BAM to stdout." << endl;
		return 1;
	}
	
	output_writer_t output_writer(bam_writer, skip_unknown);
	typedef ThreadPool<work_package_t,output_writer_t> thread_pool_t;
	thread_pool_t* thread_pool = new thread_pool_t(threads, 1000, threads, output_writer);
	
	auto_ptr<BamTools::BamAlignment> read_aln(new BamTools::BamAlignment);
	long long counter = 0;
	while ( bam_reader.GetNextAlignment(*(read_aln.get())) ) {
		counter += 1;
		if (counter % 200000 == 0) {
			cerr << "Having processed " << counter << " read alignments" << endl;
		}
		thread_pool->addTask(auto_ptr<work_package_t>(new work_package_t(read_aln.release(), tag_calculator, bam_ref_data)));
		read_aln = auto_ptr<BamTools::BamAlignment>(new BamTools::BamAlignment);
	}
	delete thread_pool;
	tag_calculator.printWarnings(cerr, output_writer.stats);
	return 0;
}
